load data: 224 subjects, with 788 goals are included in the following analysis

goalRating_long_R <- read.csv("./inputs/goalRating_long_R.csv",stringsAsFactors = F)

indivDiffDf <- read.csv("./inputs/indivDiffDf.csv",stringsAsFactors = F)

goalDf_sum_wide <- read.csv("./inputs/goalDf_wide.csv",stringsAsFactors = F)

Data Screening for goal representation assessment

Missing data

Check the number of missing data per variable, and below is the top 5 variables. Missing data is rare for all variables

# check the number of "I'm not sure" responses per variable
totalGoal <- nrow(goalRating_long_R)/39

goalRating_long_R %>%
  filter(is.na(rating)) %>%
  tabyl(variable) %>%
  mutate(percent = n/totalGoal) %>%
  arrange(desc(percent)) %>%
  head(5)
##         variable n     percent
##       commitment 6 0.007614213
##       importance 5 0.006345178
##  instrumentality 5 0.006345178
##           regret 5 0.006345178
##         conflict 4 0.005076142

The “I’m not sure” response

“construal_level”,“approach_avoidance” and “attainment_maintenance” question have an option for “I’m not sure” because they ask subjects to categorize their goals.

around 2% of the goals had “I’m not sure” as the response.

# check the number of "I'm not sure" responses per varialbe
goalRating_long_R %>%
  filter(rating == 99) %>%
  tabyl(variable) %>%
  mutate(percent = n/totalGoal) %>%
  arrange(desc(percent))
##                  variable  n    percent
##           construal_level 22 0.02791878
##      approach_avoidance_R 15 0.01903553
##  attainment_maintenance_R 15 0.01903553

The “not specified” response

temporal_duration, frequency and end_state_specificity question have an option for “not specified” because they ask about features that may not be applicable to all goals.

The end state specificity is not applicable to around 10% of the goals

# check the number of "not specified" responses per varialbe
goalRating_long_R %>%
  filter(rating == 999) %>%
  tabyl(variable) %>%
  mutate(percent = n/totalGoal) %>%
  arrange(desc(percent))
##                 variable  n    percent
##  end_state_specificity_R 81 0.10279188
##        temporal_duration 42 0.05329949
##              frequency_R 21 0.02664975

Transform all special cases to NAs

All “I’m not sure” and “not specified” responses will be treated as missing data.

# transform 99 & 999 to NAs
goalRating_long_R <- goalRating_long_R %>% 
  mutate(rating = replace(rating, rating == 99 | rating == 999, NA))

The number of claimed goals

Descriptive on the number of goals subject claimed to have prior to listing them (in the SONA study, the median of claimed goal is 3)

describe(goalDf_sum_wide$total_goal)
##    vars   n mean    sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 224 4.37 13.69      3    3.03 1.48   1 200   199 13.22   183.95 0.91

Visualize the number of claimed goals per subject after excluding the extreme value (> 50) (we have 1 claimed 50, 1 claimed 200)

breaks = (1:20)
goalDf_sum_wide %>% 
  filter(total_goal < 50) %>%
  ggplot(aes(x = total_goal)) + 
  scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=breaks) + 
  geom_histogram(fill = "orange", 
                 colour = "black",
                 binwidth = 1) + 
  labs(x="Number of claimed goals", y="# of participants") +
  theme_classic(base_size = 18) 

The percentage of subjects who claimed having more than 5 goals: 6.25%

# get the number of total subject
totalSub <- nrow(indivDiffDf)

length(goalDf_sum_wide$total_goal[goalDf_sum_wide$total_goal>5])/totalSub
## [1] 0.0625

Descriptive on the number of goals participants actual listed (in the SONA study, the mean is 3.52)

describe(goalDf_sum_wide$listNum)
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 224 3.52 1.37      3    3.61 1.48   1   5     4 -0.24    -1.36 0.09
breaks <- (1:5)
goalDf_sum_wide %>% 
  ggplot(aes(x = listNum)) + 
  scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=seq(1, 5, by = 1)) + 
  geom_histogram(fill = "orange", 
                 colour = "black",
                 binwidth = 1) + 
  labs(x="Number of listed goals", y="# of participants") +
  theme_classic(base_size = 18) 

number of people who listed 1 goal: 15 (SONA study: 1)

length(goalDf_sum_wide$listNum[goalDf_sum_wide$listNum == 1])
## [1] 15

descriptive on the differences between the number of claimed goals and listed goals (after exclude the 2 extreme cases)

goalDf_sum_wide <-goalDf_sum_wide %>%
  mutate(diffNum = total_goal - listNum)

goalDf_sum_wide_clean <- goalDf_sum_wide %>%filter(total_goal < 50)
  
describe(goalDf_sum_wide_clean$diffNum)
##    vars   n  mean sd median trimmed mad min max range skew kurtosis   se
## X1    1 222 -0.22  2      0   -0.24   0  -4  15    19 4.37    31.27 0.13
breaks <- (-4:15)
goalDf_sum_wide_clean %>% 
  ggplot(aes(x = diffNum)) + 
  scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=breaks) + 
  geom_histogram(fill = "orange", 
                 colour = "black",
                 binwidth = 1) + 
  labs(x="Number of claimed goals - listed goals", y="# of participants") +
  theme_classic(base_size = 18) 

percentage of people who listed more goals than they claimed: 21.875%

length(goalDf_sum_wide$diffNum[goalDf_sum_wide$diffNum <0])/totalSub *100
## [1] 21.875

percentage of people who listed less goals more than they claimed: 7.5%

length(goalDf_sum_wide$diffNum[goalDf_sum_wide$diffNum >0])/totalSub *100
## [1] 7.589286

Compared to the SONA study, more people listed more goals than they claimed, which may indicate a priming effect of the goal listing task.

Goal Representation Ratings

Descriptive stats

# descriptive stats for each variable 
goalRating_long_R %>%
  dplyr::select(variable, rating) %>%
  group_by(variable) %>%
  summarize(mean = mean(rating, na.rm = TRUE),
            sd = sd(rating, na.rm = TRUE), 
            n = n(),
            min = min(rating, na.rm = TRUE),
            max = max(rating, na.rm = TRUE),
            skew = skew(rating, na.rm = T), 
            kurtosi = kurtosi(rating, na.rm = T)
            ) %>%
  arrange(skew) %>%
  kable(format = "html", escape = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
## `summarise()` ungrouping output (override with `.groups` argument)
variable mean sd n min max skew kurtosi
specificity 5.704459 1.4407669 788 1 7 -1.3872095 1.7286873
ideal_motivation 5.766201 1.3322105 788 1 7 -1.2862967 1.7244908
clarity 5.795426 1.1862569 788 1 7 -1.2448730 1.9444376
identified_motivation 6.024112 1.0803441 788 1 7 -1.2313316 1.7416065
initial_time_R 6.513959 1.5936845 788 1 8 -1.2151952 1.1675351
importance 6.033206 1.1051707 788 1 7 -1.1553897 1.3625448
basic_needs 5.440204 1.5904530 788 1 7 -1.1414223 0.8365728
control 5.963151 1.1535604 788 1 7 -1.1202376 1.1152625
social_desirability 5.933926 1.1135096 788 1 7 -1.0791729 1.2900521
attractiveness_achievement 5.955527 1.0309729 788 1 7 -1.0174535 1.1152611
commitment 5.941177 1.1446553 788 1 7 -0.9952599 0.6455821
commonality 5.395172 1.5358737 788 1 7 -0.9587350 0.4018835
attainability 8.484076 2.0171620 788 1 11 -0.9280890 0.7132788
instrumentality 5.365262 1.5511464 788 1 7 -0.9184357 0.2736961
attractiveness_progress 5.656489 1.1134682 788 1 7 -0.9097796 1.0867679
regret 5.361430 1.5704737 788 1 7 -0.8853851 0.1836925
measurability 5.624682 1.4152406 788 1 7 -0.8838137 0.1848673
meaningfulness 5.271684 1.5284160 788 1 7 -0.7857683 0.1570127
temporal_duration 3.089933 0.9353960 788 1 4 -0.7301076 -0.4445260
construal_level 4.985583 1.8021719 788 1 7 -0.6515892 -0.5443425
visibility 4.961735 1.7792805 788 1 7 -0.6501947 -0.5088387
approach_avoidance_R 5.041451 2.3474670 788 1 7 -0.6469084 -1.2699673
affordance 5.138677 1.4311744 788 1 7 -0.5928686 -0.1145568
difficulty 5.369759 1.3180100 788 1 7 -0.5879299 -0.1261859
external_importance 4.656489 1.8761839 788 1 7 -0.5398822 -0.7832015
effort 5.071429 1.5026584 788 1 7 -0.5139455 -0.4310914
urgency 4.975796 1.5061693 788 1 7 -0.4734348 -0.3103415
introjected_motivation 4.349428 2.0261984 788 1 7 -0.4184255 -1.1083868
intrinsic_motivation 4.450255 2.0133843 788 1 7 -0.4000731 -1.1012062
connectedness 4.506378 1.8822177 788 1 7 -0.3800438 -0.9544725
external_motivation 4.127226 2.0943592 788 1 7 -0.2888726 -1.3180909
advancement 6.292621 2.9098334 788 1 11 -0.1095035 -1.1142280
ought_motivation 3.888183 2.1443339 788 1 7 -0.0876986 -1.4386476
procrastination 4.034395 1.8280940 788 1 7 -0.0807747 -1.1777999
attainment_maintenance_R 4.002591 2.4737257 788 1 7 0.0565910 -1.6714239
frequency_R 1.473890 0.4996441 788 1 2 0.1043766 -1.9917005
end_state_specificity_R 1.934566 0.8923464 788 1 3 0.1279772 -1.7339052
conflict 3.502551 2.0279008 788 1 7 0.1669974 -1.3336501
failure 1.705210 0.8646350 788 1 3 0.6051053 -1.3934385
# order based on their skewness 
#kable(varDf[order(varDf$skew),])

The trend showed in these histograms are very similar to the SONA study

# histograms for each dimension
goalRating_long_R %>%
  ggplot(aes(x = rating)) +
    geom_histogram(fill   = "orange",
                   colour = "black",
                   alpha  = .6) +
    facet_wrap(~variable, nrow = 7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

correlational matrix across all variables

“pairwise.complete.obs” is used for generating correlation matrix.The correlations make sense

# transform the long format to short format
goalDf_wide <- goalRating_long_R %>% spread (variable, rating)

# generate a correctional matrix
corrM_all <- goalDf_wide %>% 
  dplyr :: select(advancement:visibility) %>% 
  cor(use = "pairwise.complete.obs")

# visualization
corrplot(corrM_all, method = "circle",number.cex = .7, order = "AOE", addCoef.col = "black",type = "upper",col= colorRampPalette(c("midnightblue","white", "orange"))(200))

### Variance Partition

Only the 31 variables for goal representation are included. Only around 8.4% of the variance is on the between subject level.

# subset the long format dataset for only the 31 goal representation variable
goal_striving <- c("commitment", "urgency", "effort", "advancement", "initial_time_R", "regret", "procrastination", "failure")
goalDf_R_long <- goalRating_long_R[!goalRating_long_R$variable %in% goal_striving,]

# generate a multilevel model with subject as the random intercept
mlm <-lmer(rating ~ variable + (1|MTurkCode), data = goalDf_R_long)

# calculate the variance partition coefficient and transform to ICC
VarCorr(mlm) %>%
  as_tibble() %>%
  mutate(icc=vcov/sum(vcov)) %>%
  dplyr :: select(grp, icc)
## # A tibble: 2 x 2
##   grp          icc
##   <chr>      <dbl>
## 1 MTurkCode 0.0844
## 2 Residual  0.916
Raw <- VarCorr(mlm) %>%
  as_tibble() %>%
  mutate(Raw=vcov/sum(vcov)) %>%
  dplyr :: select(Raw)

Data transformation

26 variables are included. Ordinal variables are not included: “temporal_duration” & “end_state_specificity” and “frequency”; appoach_avoidance_R & attainment_maintainance_R are also dropped because these 2 variables are more relevant to the phrasing/content of a goal than the perception of a goal. This step is consistent with the SONA study

# Exclude the 8 variables related to goal striving progress
goalDf_R_wide <- goalDf_wide[,!names(goalDf_wide) %in% goal_striving]

# Exclude 5 goal representation variables and other columns with irrelevant data
goal_exclude <- c("temporal_duration", "end_state_specificity_R", "frequency_R", "attainment_maintenance_R", "approach_avoidance_R")
goalDf_EFA <- goalDf_R_wide[,!names(goalDf_R_wide) %in% goal_exclude]
goalDf_EFA <- subset(goalDf_EFA, select = affordance : visibility)

# Generate a correctional matrix 
corrM_raw <- cor(goalDf_EFA, use = "pairwise")

evaluate the number of factors

# use Very Simple Structure criterion
res_vss <- psych :: nfactors(corrM_raw, n = 10, rotate = "promax", diagonal = FALSE, fm = "minres", 
n.obs=788,title="Very Simple Structure",use="pairwise",cor="cor")

# select useful parameters and organize them into a table
cbind(1:10, res_vss$map) %>%
  as_tibble() %>%
  rename(., factor = V1, map = V2) %>%
  cbind(., res_vss$vss.stats) %>%
  select(factor, map, fit, complex, eChisq, SRMR, eCRMS, eBIC, eRMS) %>%
  kable(format = "html", escape = F, caption = "VSS output after dropping 2 variables") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
VSS output after dropping 2 variables
factor map fit complex eChisq SRMR eCRMS eBIC eRMS
1 0.0300601 0.5531095 1.000000 8651.22574 0.1299628 0.1354956 6657.0458 0.1299628
2 0.0147079 0.6562891 1.257165 2616.62404 0.0714745 0.0778426 789.1816 0.0714745
3 0.0133261 0.7147054 1.417630 1323.81523 0.0508386 0.0579650 -343.5593 0.0508386
4 0.0126501 0.7256847 1.586970 691.25321 0.0367366 0.0439569 -822.7229 0.0367366
5 0.0140773 0.7347253 1.733995 459.75518 0.0299601 0.0377232 -907.4919 0.0299601
6 0.0162208 0.7256312 1.932734 327.39498 0.0252823 0.0336008 -899.7927 0.0252823
7 0.0186402 0.7072665 1.952297 229.98864 0.0211901 0.0298300 -863.8090 0.0211901
8 0.0222787 0.6824480 2.081200 162.70862 0.0178232 0.0266835 -804.3686 0.0178232
9 0.0250941 0.6711510 2.084255 118.70666 0.0152236 0.0243533 -728.3196 0.0152236
10 0.0295462 0.6765429 1.948696 87.87141 0.0130980 0.0225138 -645.7734 0.0130980
# Use the Scree plot to identify the number of factors have Eigenvalues >1 and the output from the Parallel analysis

ev <- eigen(corrM_raw)
ap <- parallel(subject=nrow(goalDf_EFA),var=ncol(goalDf_EFA),
  rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

### Extract factors

Extract number of factors based on the suggestions above. Because we expect factors to be correlated with each other, we use “promax” rotation.

# extract 4 factors
fa_raw_4 <-fa(r=corrM_raw, nfactors=4,n.obs = 788, rotate="promax", SMC=FALSE, fm="minres")

# extract 5 factors
fa_raw_5 <-fa(r=corrM_raw, nfactors=5,n.obs = 788, rotate="promax", SMC=FALSE, fm="minres")

# extract 6 factors
fa_raw_6 <-fa(r=corrM_raw, nfactors=6,n.obs = 788, rotate="promax", SMC=FALSE, fm="minres")

Compare loadings for each model

4 factors

fa.diagram(fa_raw_4)

5 factors

fa.diagram(fa_raw_5)

6 factors

fa.diagram(fa_raw_6)

5 factors

Compared to the 5 factors yield from the SONA study, the factor “measurability” is combined with “attainability”, and the factor ideal is new. It’s composed by item “ideal_motivation” (used to be in factor “importance”), “Control”(“measurability”), “meaningfulness”(“importance”)

fa.sort(fa_raw_5)
## Factor Analysis using method =  minres
## Call: fa(r = corrM_raw, nfactors = 5, n.obs = 788, rotate = "promax", 
##     SMC = FALSE, fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                              MR1   MR2   MR3   MR4   MR5   h2   u2 com
## ought_motivation            0.82 -0.28  0.05  0.29 -0.03 0.71 0.29 1.5
## external_motivation         0.77 -0.14  0.05  0.22 -0.18 0.61 0.39 1.4
## conflict                    0.77 -0.29  0.01  0.08  0.06 0.59 0.41 1.3
## external_importance         0.69  0.26  0.06  0.02 -0.23 0.59 0.41 1.5
## connectedness               0.57  0.08  0.04 -0.03  0.09 0.39 0.61 1.1
## visibility                  0.52  0.23  0.17 -0.08 -0.16 0.38 0.62 1.9
## construal_level             0.46  0.22 -0.15 -0.06  0.18 0.37 0.63 2.1
## intrinsic_motivation        0.42 -0.12  0.06 -0.19  0.35 0.40 0.60 2.6
## attractiveness_achievement -0.15  0.74 -0.02 -0.07  0.10 0.51 0.49 1.1
## importance                 -0.01  0.69  0.11  0.00  0.02 0.53 0.47 1.1
## identified_motivation      -0.09  0.61  0.10  0.07  0.02 0.44 0.56 1.1
## attractiveness_progress     0.01  0.52  0.17  0.00  0.16 0.44 0.56 1.4
## instrumentality             0.28  0.50 -0.06  0.00  0.10 0.43 0.57 1.7
## difficulty                  0.25  0.27 -0.16  0.11  0.05 0.20 0.80 3.1
## clarity                    -0.08  0.01  0.76  0.08  0.04 0.60 0.40 1.0
## attainability               0.15  0.04  0.59 -0.04  0.04 0.42 0.58 1.2
## measurability               0.02  0.03  0.56  0.10 -0.07 0.33 0.67 1.1
## affordance                  0.24 -0.10  0.44 -0.08  0.11 0.29 0.71 1.9
## specificity                -0.01  0.14  0.41 -0.05 -0.08 0.21 0.79 1.4
## commonality                 0.13 -0.01  0.03  0.57  0.05 0.36 0.64 1.1
## introjected_motivation      0.40 -0.14 -0.01  0.49  0.13 0.41 0.59 2.3
## basic_needs                 0.13  0.29 -0.01  0.45 -0.01 0.42 0.58 1.9
## social_desirability        -0.15  0.40  0.08  0.41  0.00 0.46 0.54 2.3
## ideal_motivation           -0.08  0.34 -0.15  0.15  0.59 0.51 0.49 2.0
## control                    -0.33  0.07  0.30  0.15  0.37 0.34 0.66 3.3
## meaningfulness              0.27  0.23  0.02 -0.05  0.37 0.39 0.61 2.6
## 
##                        MR1  MR2  MR3  MR4  MR5
## SS loadings           3.98 2.99 1.94 1.33 1.09
## Proportion Var        0.15 0.12 0.07 0.05 0.04
## Cumulative Var        0.15 0.27 0.34 0.39 0.44
## Proportion Explained  0.35 0.26 0.17 0.12 0.10
## Cumulative Proportion 0.35 0.62 0.79 0.90 1.00
## 
##  With factor correlations of 
##      MR1  MR2  MR3   MR4   MR5
## MR1 1.00 0.24 0.08  0.11  0.27
## MR2 0.24 1.00 0.34  0.36  0.25
## MR3 0.08 0.34 1.00  0.02  0.23
## MR4 0.11 0.36 0.02  1.00 -0.07
## MR5 0.27 0.25 0.23 -0.07  1.00
## 
## Mean item complexity =  1.7
## Test of the hypothesis that 5 factors are sufficient.
## 
## The degrees of freedom for the null model are  325  and the objective function was  8.9 with Chi Square of  6922.66
## The degrees of freedom for the model are 205  and the objective function was  0.86 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  788 with the empirical chi square  459.76  with prob <  1.3e-21 
## The total number of observations was  788  with Likelihood Chi Square =  665.53  with prob <  4.5e-50 
## 
## Tucker Lewis Index of factoring reliability =  0.889
## RMSEA index =  0.053  and the 90 % confidence intervals are  0.049 0.058
## BIC =  -701.71
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                    MR1  MR2  MR3  MR4  MR5
## Correlation of (regression) scores with factors   0.95 0.92 0.88 0.83 0.81
## Multiple R square of scores with factors          0.89 0.85 0.78 0.69 0.65
## Minimum correlation of possible factor scores     0.79 0.70 0.56 0.37 0.31
# visualization
loadings <- fa.sort(fa_raw_5)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("ought", "importance", "attainability", "commonality", "ideal")
loadings$Variables <- rownames(loadings)
loadings.m <- loadings %>% gather(-Variables, key = "Factor", value = "Loading")
colOrder <- c("ought", "importance", "attainability", "commonality", "ideal")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Variables=factor(Variables,leve=rowOrder)),Variables)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)

ggplot(loadings.m, aes(Variables, abs(Loading), fill=Loading)) + 
  facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
  geom_bar(stat="identity") + #make the bars
  coord_flip() + #flip the axes so the test names can be horizontal  
  #define the fill color gradient: blue=positive, red=negative
  scale_fill_gradient2(name = "Loading", 
                       high = "orange", mid = "white", low = "midnightblue", 
                       midpoint=0, guide="colourbar") +
  ylab("Loading Strength") + #improve y-axis label + 
  ggtitle("Loadings for 5 factors") + 
  theme_bw(base_size=10)

The 5 factor loadings from the SONA study:

SONA 5-factor

interfactor correlation

fa_raw_5$Phi %>% 
  as.tibble() %>% 
  dplyr::rename(ought = MR1, importance = MR2, attainability = MR3, commonality = MR4, ideal = MR5) %>%
  round(.,2) %>%
  remove_rownames() %>%
  mutate(factor = colnames(.)) %>%
  select(factor, everything()) %>%
  kable(format = "html", escape = F, caption = "Interfactor Correlation") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
## Warning: `as.tibble()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
Interfactor Correlation
factor ought importance attainability commonality ideal
ought 1.00 0.24 0.08 0.11 0.27
importance 0.24 1.00 0.34 0.36 0.25
attainability 0.08 0.34 1.00 0.02 0.23
commonality 0.11 0.36 0.02 1.00 -0.07
ideal 0.27 0.25 0.23 -0.07 1.00

6 factors

factor loadings

Compared to the 6 factors yield from the SONA study, the “instrumentality” is replaced by the factor “ideal”.

fa.sort(fa_raw_6)
## Factor Analysis using method =  minres
## Call: fa(r = corrM_raw, nfactors = 6, n.obs = 788, rotate = "promax", 
##     SMC = FALSE, fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                              MR1   MR2   MR4   MR3   MR6   MR5   h2   u2 com
## conflict                    0.90 -0.30 -0.04 -0.06  0.16  0.00 0.66 0.34 1.3
## ought_motivation            0.81 -0.29  0.27  0.13 -0.02 -0.08 0.71 0.29 1.6
## external_motivation         0.75 -0.15  0.18  0.04  0.06 -0.21 0.60 0.40 1.4
## connectedness               0.58  0.09 -0.06  0.06  0.05  0.03 0.39 0.61 1.1
## introjected_motivation      0.52 -0.14  0.41 -0.11  0.08  0.13 0.43 0.57 2.4
## external_importance         0.51  0.27  0.10  0.23 -0.11 -0.30 0.65 0.35 2.9
## intrinsic_motivation        0.47 -0.08 -0.22  0.18 -0.02  0.26 0.39 0.61 2.5
## construal_level             0.46  0.25 -0.07  0.00 -0.13  0.10 0.37 0.63 1.9
## visibility                  0.36  0.23 -0.01  0.28 -0.02 -0.22 0.41 0.59 3.4
## difficulty                  0.35  0.30  0.00 -0.31  0.09  0.01 0.27 0.73 3.1
## attractiveness_achievement -0.17  0.78 -0.07 -0.11  0.04  0.08 0.52 0.48 1.2
## importance                 -0.07  0.71  0.02  0.02  0.07  0.02 0.53 0.47 1.0
## identified_motivation      -0.14  0.63  0.10  0.01  0.06  0.03 0.44 0.56 1.2
## attractiveness_progress     0.01  0.55 -0.01  0.06  0.14  0.14 0.44 0.56 1.3
## instrumentality             0.26  0.52 -0.01 -0.05 -0.01  0.06 0.42 0.58 1.5
## commonality                 0.11 -0.05  0.65  0.09 -0.11  0.10 0.41 0.59 1.2
## social_desirability        -0.19  0.40  0.46  0.01  0.00  0.05 0.46 0.54 2.4
## basic_needs                 0.13  0.29  0.45 -0.09  0.01  0.02 0.42 0.58 2.0
## attainability               0.05  0.03  0.04  0.55  0.22  0.05 0.46 0.54 1.4
## affordance                  0.15 -0.11  0.00  0.52  0.09  0.10 0.33 0.67 1.4
## measurability               0.12  0.03 -0.01  0.08  0.65 -0.04 0.45 0.55 1.1
## specificity                 0.05  0.17 -0.14  0.04  0.48 -0.08 0.27 0.73 1.5
## clarity                    -0.09  0.03  0.08  0.44  0.47  0.08 0.57 0.43 2.2
## ideal_motivation           -0.01  0.38  0.15  0.00 -0.20  0.53 0.51 0.49 2.3
## control                    -0.24  0.10  0.12  0.15  0.18  0.40 0.33 0.67 2.9
## meaningfulness              0.27  0.26 -0.03  0.18 -0.11  0.30 0.40 0.60 4.0
## 
##                        MR1  MR2  MR4  MR3  MR6  MR5
## SS loadings           3.90 3.14 1.34 1.33 1.21 0.94
## Proportion Var        0.15 0.12 0.05 0.05 0.05 0.04
## Cumulative Var        0.15 0.27 0.32 0.37 0.42 0.46
## Proportion Explained  0.33 0.26 0.11 0.11 0.10 0.08
## Cumulative Proportion 0.33 0.59 0.71 0.82 0.92 1.00
## 
##  With factor correlations of 
##       MR1  MR2   MR4   MR3   MR6   MR5
## MR1  1.00 0.34  0.13  0.23 -0.14  0.07
## MR2  0.34 1.00  0.35  0.33  0.24  0.19
## MR4  0.13 0.35  1.00 -0.03  0.21 -0.12
## MR3  0.23 0.33 -0.03  1.00  0.24  0.08
## MR6 -0.14 0.24  0.21  0.24  1.00  0.14
## MR5  0.07 0.19 -0.12  0.08  0.14  1.00
## 
## Mean item complexity =  1.9
## Test of the hypothesis that 6 factors are sufficient.
## 
## The degrees of freedom for the null model are  325  and the objective function was  8.9 with Chi Square of  6922.66
## The degrees of freedom for the model are 184  and the objective function was  0.69 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.03 
## 
## The harmonic number of observations is  788 with the empirical chi square  327.39  with prob <  4e-10 
## The total number of observations was  788  with Likelihood Chi Square =  535.07  with prob <  5.6e-36 
## 
## Tucker Lewis Index of factoring reliability =  0.906
## RMSEA index =  0.049  and the 90 % confidence intervals are  0.044 0.054
## BIC =  -692.12
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    MR1  MR2  MR4  MR3  MR6  MR5
## Correlation of (regression) scores with factors   0.95 0.93 0.84 0.83 0.83 0.79
## Multiple R square of scores with factors          0.90 0.86 0.70 0.69 0.69 0.62
## Minimum correlation of possible factor scores     0.79 0.73 0.40 0.38 0.37 0.25
# visualization
loadings <- fa.sort(fa_raw_6)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("ought", "importance", "commonality", "attainability", "measurability", "ideal")
loadings$Variables <- rownames(loadings)
loadings.m <- loadings %>% gather(-Variables, key = "Factor", value = "Loading")
colOrder <- c("ought", "importance", "commonality", "attainability", "measurability", "ideal")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Variables=factor(Variables,leve=rowOrder)),Variables)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)

ggplot(loadings.m, aes(Variables, abs(Loading), fill=Loading)) + 
  facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
  geom_bar(stat="identity") + #make the bars
  coord_flip() + #flip the axes so the test names can be horizontal  
  #define the fill color gradient: blue=positive, red=negative
  scale_fill_gradient2(name = "Loading", 
                       high = "orange", mid = "white", low = "midnightblue", 
                       midpoint=0, guide="colourbar") +
  ylab("Loading Strength") + #improve y-axis label + 
  ggtitle("Loadings for 6 factors") + 
  theme_bw(base_size=10)

The 6 factor loadings from the SONA study:

SONA 6-factor #### interfactor correlation

fa_raw_6$Phi %>% 
  as.tibble() %>% 
  dplyr::rename(ought = MR1, importance = MR2, measurability = MR4, commonality = MR3, ideal = MR6, attainability = MR5) %>%
  round(.,2) %>%
  remove_rownames() %>%
  mutate(factor = colnames(.)) %>%
  select(factor, everything()) %>%
  kable(format = "html", escape = F, caption = "Interfactor Correlation") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
Interfactor Correlation
factor ought importance measurability commonality ideal attainability
ought 1.00 0.34 0.13 0.23 -0.14 0.07
importance 0.34 1.00 0.35 0.33 0.24 0.19
measurability 0.13 0.35 1.00 -0.03 0.21 -0.12
commonality 0.23 0.33 -0.03 1.00 0.24 0.08
ideal -0.14 0.24 0.21 0.24 1.00 0.14
attainability 0.07 0.19 -0.12 0.08 0.14 1.00

Compare model fit & complexity

# generate a dataframe 
fa_fitDf <- data.frame(factors = c(5,6),
                        chi = c(fa_raw_5$chi,fa_raw_6$chi),
                        BIC = c(fa_raw_5$BIC,fa_raw_6$BIC),
                        fit = c(fa_raw_5$fit,fa_raw_6$fit),
                        RMSEA = c(fa_raw_5$RMSEA[1],fa_raw_6$RMSEA[1]),
                       cumVar = c(max(fa_raw_5$Vaccounted[3,]), max(fa_raw_6$Vaccounted[3,])),
                        complexity = c(mean(fa_raw_5$complexity),mean(fa_raw_6$complexity)))

fa_fitDf
##   factors      chi       BIC       fit      RMSEA    cumVar complexity
## 1       5 459.7552 -701.7141 0.8522999 0.05337864 0.4359856   1.733994
## 2       6 327.3950 -692.1190 0.8654053 0.04919028 0.4565280   1.932734